
# This file creates data for parents who had children of same sex in first two parities --------------------

# install.packages("foreign")
# install.packages("dplyr")
# install.packages("date")

library(foreign)
library(dplyr)
library(date)

#Load raw data
raw_data <- read.dta("stata_data.dta")

raw_data$date <- as.numeric(raw_data$FOED_DAG)

#Function for deciding birth order
birth_order_func <- 
  function(date) {
    birth_order <- rep(NA, length(date))
    for (i in 1:length(date)){
      birth_order[i] <- i
      if (i > 1) {
        if((date[i] - date[i-1]) < 8){
          birth_order[i] <- birth_order[i-1]
        }
      }
    }
    return(birth_order)
  }

# Subset to data with mother id and set birth order
twin_mom <- 
  raw_data %>%
  filter(mor_id != "" )  %>%
  group_by(mor_id) %>%
  arrange(date) %>%
  mutate(birth_order = birth_order_func(date))

# Identify twins as same mother id born with same birth order 
twin_mom <- 
  twin_mom %>%
  group_by(mor_id, birth_order) %>%
  mutate(twin = length(date))

#Subset to persons with missing id for mother but not father and 
#define twins among them.

# Select mothers data with non-missing father id
twin_dad <- 
  twin_mom %>%
  filter(far_id != "")

# Identify birth order and twins among children with missing mother id 
twin_dad_2 <- 
  raw_data %>%
  filter(mor_id == "" & far_id != "")  %>%
  group_by(far_id) %>%
  arrange(date) %>%
  mutate(birth_order = birth_order_func(date))

twin_dad_2 <- 
  twin_dad_2 %>%
  group_by(far_id, birth_order) %>%
  mutate(twin = length(date))

# Bind the two father frames together 
twin_dad <-
  rbind(twin_dad, twin_dad_2)


# Generate indicator for being first-born and twin and second-born and twin

twin_mom <-
  as.data.frame(twin_mom) %>%
  mutate(twin_first  = twin > 1 & birth_order == 1,
         twin_second = twin > 1 & birth_order == 2)

twin_dad <-
  as.data.frame(twin_dad) %>%
  mutate(twin_first  = twin > 1 & birth_order == 1,
         twin_second = twin > 1 & birth_order == 2)

# numeric values for dates (born after Jan 1, 1984 or Jan 1, 1988)
date09 <- 5113
date13 <- 6574

date09_election <- 14565
date13_election <- 16028
date14_election <- 16216
  
# create variables for sex of firstborns 

twin_mom_agg <- 
  twin_mom %>%
  mutate(first_female  = birth_order == 1 & female == 1,
         second_female = birth_order == 2 & female == 1,
         pnr     = mor_id) %>%
  # aggregate by mother's id
  # create variables for number of children, sex composition of first borns, and age of oldest child
  group_by(pnr) %>%
  summarise(twin_first     = sum(twin_first),
            twin_second    = sum(twin_second),
            first_female   = sum(first_female),
            second_female  = sum(second_female),
            no_children_09 = sum(date < date09_election),
            no_children_13 = sum(date < date13_election),
            no_children_14 = sum(date < date14_election),
            age_oldest     = min(date)) %>%
  mutate(first_two_same = first_female == second_female) %>%
  # filter of mothers with twins in one of first two parities 
  filter(twin_first == 0 & twin_second == 0) %>%
  select(pnr, twin_first, twin_second, no_children_09, 
         no_children_13, no_children_14, age_oldest, first_female,
         second_female, first_two_same)

# Select pnr, turnout data and age
mothers <- 
  twin_mom %>%
  filter(female == 1) %>%
  select(pnr, stemte_2009, stemt, ep_stemt, date)

#Merge information about children to turnout
mothers <- 
  left_join(mothers, twin_mom_agg, by = "pnr")

# Repeat lines 95-126 for fathers ----------------------------------------

twin_dad_agg <- 
  as.data.frame(twin_dad) %>%
  mutate(first_female  = birth_order == 1 & female == 1,
         second_female = birth_order == 2 & female == 1,
         pnr     = far_id) %>%
  group_by(pnr) %>%
  summarise(twin_first     = sum(twin_first),
            twin_second    = sum(twin_second),
            first_female   = sum(first_female),
            second_female  = sum(second_female),
            no_children_09 = sum(date < date09_election),
            no_children_13 = sum(date < date13_election),
            no_children_14 = sum(date < date14_election),
            age_oldest     = min(date)) %>%
  mutate(first_two_same = first_female == second_female) %>%
  filter(twin_first == 0 & twin_second == 0) %>%
  select(pnr, twin_first, twin_second, no_children_09, 
         no_children_13, no_children_14, age_oldest, first_female,
         second_female, first_two_same)

fathers <- 
  twin_dad %>%
  filter(female == 0) %>%
  select(pnr, stemte_2009, stemt, ep_stemt, date)

fathers <- 
  left_join(fathers, twin_dad_agg, by = "pnr")

save(mothers, file = "mothers_firsttwo.rdata")
save(fathers, file = "fathers_firsttwo.rdata")
